**************************************************************
* Plot before/after NM, change in risky behavior (raw data)
**************************************************************

** update on 10/20/20: generate black-white figures


clear 
clear matrix
set memory 1000m
set more off
cap log close

cd "/Users/..."

global do_file="‎⁨/Users/.../do_file"
global log_file="/Users/.../log_file"
global raw_data="/Users/.../raw_data⁩⁩"
global working_data="/Users/.../working_data"
global results="/Users/.../results"
global results="/Users/.../results/102020"



******************************************************************************
* After a NM (measured by total_prev_hard_brake), change in risky behavior? 
******************************************************************************

** first construct spell data
use working_data/clean_day_level_geo_weather_081519.dta, clear

sort user_id trip_start_date
bysort user_id: gen day_index=_n
bysort user_id: gen total_days=_N

tsset user_id day_index

tab total_prev_hard_brake, m


gen day_after_nm=0 if total_prev_hard_brake>0 
replace day_after_nm=l.day_after_nm+gap_time if day_after_nm==. 

gen day_before_nm=0 if total_prev_hard_brake>0
gsort user_id -trip_start_date

replace day_before_nm=day_before_nm[_n-1]-gap_time[_n-1] if day_before_nm==. & user_id==user_id[_n-1]

sort user_id trip_start_date


tab day_after_nm, m
tab day_before_nm, m



** find near-miss that is more "isolated"

gen temp=1 if total_prev_hard_brake>0

sort user_id temp trip_start_date

gen trip_start_date_num=date(trip_start_date, "YMD")
gen temp1=trip_start_date_num if day_index==1
bysort user_id: egen first_drive_date=total(temp1)

gen temp2=trip_start_date_num if day_index==total_days
bysort user_id: egen last_drive_date=total(temp2)

* the number of days between this nm to next nm
gen days_before_next_nm=trip_start_date_num[_n+1]-trip_start_date_num if user_id[_n+1]==user_id & temp==1 & temp[_n+1]==1
* the number of days between last nm to this nm
gen days_after_last_nm=trip_start_date_num-trip_start_date_num[_n-1] if user_id==user_id[_n-1] & temp==1 & temp[_n-1]==1


drop temp temp1 temp2


gsort user_id -trip_start_date
replace days_after_last_nm=trip_start_date_num-first_drive_date if days_after_last_nm==. & day_before_nm==0  // this is to deal with first nm
replace days_after_last_nm=days_after_last_nm[_n-1] if days_after_last_nm==. & days_after_last_nm[_n-1]~=. & user_id==user_id[_n-1] 

replace days_before_next_nm=last_drive_date-trip_start_date_num if days_before_next_nm==. & day_before==0 // this is to deal with last nm
replace days_before_next_nm=days_before_next_nm[_n-1] if days_before_next_nm==. & days_before_next_nm[_n-1]~=. & user_id==user_id[_n-1]



sort user_id trip_start_date


************************************************
** now generate average values of risky behavior
************************************************ 


* number of days after nm
gen day_after_nm_new=day_after_nm if day_after_nm<=10

egen phone_use_mean_after=mean(total_phone_use), by(day_after_nm_new)
egen distance_mean_after = mean(distance), by(day_after_nm_new)
egen duration_mean_after = mean(duration), by(day_after_nm_new)
egen speed_mean_after=mean(speed), by(day_after_nm_new)
egen drive_at_night_mean_after = mean(drive_at_night), by(day_after_nm_new)
egen highway_mean_after = mean(highway), by(day_after_nm_new)


preserve

bysort day_after_nm_new: keep if _n==1
drop if day_after_nm_new==.
keep day_after_nm_new phone_use_mean_after-highway_mean_after 

rename day_after_nm_new num_of_days
rename phone_use_mean_after phone_use
rename distance_mean_after distance
rename duration_mean_after duration
rename speed_mean_after speed
rename drive_at_night_mean_after drive_at_night
rename highway_mean_after highway

save working_data/day_after_nm.dta, replace

restore 


* try select time window (-30, 30), (-20, 20), (-10, 10)


forvalues i=10(10)30{


* number of days after nm
gen day_after_nm_new_`i'=day_after_nm if day_after_nm<=10 & days_before_next_nm>=`i' & days_after_last_nm>=`i'

egen phone_use_mean_after_`i'=mean(total_phone_use), by(day_after_nm_new_`i')
egen distance_mean_after_`i'= mean(distance), by(day_after_nm_new_`i')
egen duration_mean_after_`i'= mean(duration), by(day_after_nm_new_`i')
egen speed_mean_after_`i'=mean(speed), by(day_after_nm_new_`i')
egen drive_at_night_mean_after_`i'= mean(drive_at_night), by(day_after_nm_new_`i')
egen highway_mean_after_`i'= mean(highway), by(day_after_nm_new_`i')


* number of days before nm


gen day_before_nm_new_`i'=day_before_nm if day_before_nm<=0 & day_before_nm>=-10 & days_before_next_nm>=`i' & days_after_last_nm>=`i'

egen phone_use_mean_before_`i'=mean(total_phone_use), by(day_before_nm_new_`i')
egen distance_mean_before_`i'= mean(distance), by(day_before_nm_new_`i')
egen duration_mean_before_`i'= mean(duration), by(day_before_nm_new_`i')
egen speed_mean_before_`i'=mean(speed), by(day_before_nm_new_`i')
egen drive_at_night_mean_before_`i'= mean(drive_at_night), by(day_before_nm_new_`i')
egen highway_mean_before_`i'= mean(highway), by(day_before_nm_new_`i')



preserve

bysort day_after_nm_new_`i': keep if _n==1
drop if day_after_nm_new_`i'==. 

keep day_after_nm_new_`i' phone_use_mean_after_`i'-highway_mean_after_`i'

rename day_after_nm_new_`i' num_of_days
rename phone_use_mean_after_`i' phone_use_mean_`i'
rename distance_mean_after_`i' distance_mean_`i'
rename duration_mean_after_`i' duration_mean_`i'
rename speed_mean_after_`i' speed_mean_`i'
rename drive_at_night_mean_after_`i' drive_at_night_mean_`i'
rename highway_mean_after_`i' highway_mean_`i'

save working_data/day_after_nm_`i'.dta, replace

restore


preserve

bysort day_before_nm_new_`i': keep if _n==1
drop if day_before_nm_new_`i'==. 

keep day_before_nm_new_`i' phone_use_mean_before_`i'-highway_mean_before_`i'

rename day_before_nm_new_`i' num_of_days
rename phone_use_mean_before_`i' phone_use_mean_`i'
rename distance_mean_before_`i' distance_mean_`i'
rename duration_mean_before_`i' duration_mean_`i'
rename speed_mean_before_`i' speed_mean_`i'
rename drive_at_night_mean_before_`i' drive_at_night_mean_`i'
rename highway_mean_before_`i' highway_mean_`i'

drop if num_of_days==0

save working_data/day_before_nm_`i'.dta, replace

restore


}



forvalues i=10(10)30{

use working_data/day_before_nm_`i'.dta, clear
append using working_data/day_after_nm_`i'.dta
save working_data/day_nm_`i'.dta, replace

}


use working_data/day_nm_10.dta, clear
merge 1:1 num_of_days using working_data/day_nm_20.dta
drop _merge

merge 1:1 num_of_days using working_data/day_nm_30.dta
drop _merge




twoway line phone_use_mean_20 num_of_days, color(black) lwidth(0.5) lpattern(solid)  xline(0, lwidth(0.3) lpattern(longdash) lcolor(black)) ///
|| line phone_use_mean_30 num_of_days, color(black) lwidth(0.5) lpattern(longdash) ///
xlabel(-10(2)10)  ytitle("mean") ylabel(,angle(horizontal)) xtitle("day since NM") title("# of phone uses")  ///
legend(order(1 2) label(1 "No NM within +/- 20 days") label(2 "No NM within +/- 30 days")) ///
plotregion(color(white)) graphregion(color(white)) bgcolor(white) ///
saving($results/figures/change_rb_NM_def1/phone_use_trend, replace)
graph export $results/figures/change_rb_NM_def1/phone_use_trend.png, replace



twoway line distance_mean_20 num_of_days, color(black) lwidth(0.5) lpattern(solid)  xline(0, lwidth(0.3) lpattern(longdash) lcolor(black)) ///
|| line distance_mean_30 num_of_days, color(black) lwidth(0.5) lpattern(longdash) ///
xlabel(-10(2)10)  ytitle("mean") ylabel(,angle(horizontal)) xtitle("day since NM") title("distance (km)")  ///
legend(order(1 2) label(1 "No NM within +/- 20 days") label(2 "No NM within +/- 30 days")) ///
plotregion(color(white)) graphregion(color(white)) bgcolor(white) ///
saving($results/figures/change_rb_NM_def1/distance_trend, replace)
graph export $results/figures/change_rb_NM_def1/distance_trend.png, replace



twoway line duration_mean_20 num_of_days, color(black) lwidth(0.5) lpattern(solid)  xline(0, lwidth(0.3) lpattern(longdash) lcolor(black)) ///
|| line duration_mean_30 num_of_days, color(black) lwidth(0.5) lpattern(longdash) ///
xlabel(-10(2)10)  ytitle("mean") ylabel(,angle(horizontal)) xtitle("day since NM") title("duration (h)")  ///
legend(order(1 2) label(1 "No NM within +/- 20 days") label(2 "No NM within +/- 30 days")) ///
plotregion(color(white)) graphregion(color(white)) bgcolor(white) ///
saving($results/figures/change_rb_NM_def1/duration_trend, replace)
graph export $results/figures/change_rb_NM_def1/duration_trend.png, replace
	
	
twoway line speed_mean_20 num_of_days, color(black) lwidth(0.5) lpattern(solid)  xline(0, lwidth(0.3) lpattern(longdash) lcolor(black)) ///
|| line speed_mean_30 num_of_days, color(black) lwidth(0.5) lpattern(longdash) ///
xlabel(-10(2)10)  ytitle("mean") ylabel(,angle(horizontal)) xtitle("day since NM") title("speed (km/h)")  ///
legend(order(1 2) label(1 "No NM within +/- 20 days") label(2 "No NM within +/- 30 days")) ///
plotregion(color(white)) graphregion(color(white)) bgcolor(white) ///
saving($results/figures/change_rb_NM_def1/speed_trend, replace)
graph export $results/figures/change_rb_NM_def1/speed_trend.png, replace



twoway line drive_at_night_mean_20 num_of_days, color(black) lwidth(0.5) lpattern(solid)  xline(0, lwidth(0.3) lpattern(longdash) lcolor(black)) ///
|| line drive_at_night_mean_30 num_of_days, color(black) lwidth(0.5) lpattern(longdash) ///
xlabel(-10(2)10)  ytitle("mean") ylabel(,angle(horizontal)) xtitle("day since NM") title("drive at night")  ///
legend(order(1 2) label(1 "No NM within +/- 20 days") label(2 "No NM within +/- 30 days")) ///
plotregion(color(white)) graphregion(color(white)) bgcolor(white) ///
saving($results/figures/change_rb_NM_def1/drive_at_night_trend, replace)
graph export $results/figures/change_rb_NM_def1/drive_at_night_trend.png, replace



twoway line highway_mean_20 num_of_days, color(black) lwidth(0.5) lpattern(solid)  xline(0, lwidth(0.3) lpattern(longdash) lcolor(black)) ///
|| line highway_mean_30 num_of_days, color(black) lwidth(0.5) lpattern(longdash) ///
xlabel(-10(2)10)  ytitle("mean") ylabel(,angle(horizontal)) xtitle("day since NM") title("# of highway uses")  ///
legend(order(1 2) label(1 "No NM within +/- 20 days") label(2 "No NM within +/- 30 days")) ///
plotregion(color(white)) graphregion(color(white)) bgcolor(white) ///
saving($results/figures/change_rb_NM_def1/highway_trend, replace)
graph export $results/figures/change_rb_NM_def1/highway_trend.png, replace



			  
grc1leg	$results/figures/change_rb_NM_def1/phone_use_trend.gph $results/figures/change_rb_NM_def1/distance_trend.gph $results/figures/change_rb_NM_def1/duration_trend.gph ///
			  $results/figures/change_rb_NM_def1/speed_trend.gph $results/figures/change_rb_NM_def1/drive_at_night_trend.gph $results/figures/change_rb_NM_def1/highway_trend.gph, r(2) ///
			  plotregion(color(white)) graphregion(color(white)) 
graph export $results/figures/change_rb_NM_def1/trend_all.png, replace	










**********************
* Plot Long-Run Effect
**********************


* load data from excel for IV spec (072020 results), add additional lags, NM def1


use $results/figures/plot_long_run.dta, clear




***********************
***********************
* Plot Long-Run Effect: combine the two graphs
***********************
***********************




twoway line phone_use drive_days, color(black)  || scatter phone_use drive_days, color(black) msymbol(s) msize(small)  ///
	|| line phone_use_2 drive_days, color(black) lpattern(longdash) || scatter phone_use_2 drive_days, color(black) msymbol(t) msize(small) ///
legend(order(1 3) label(1 "AR(1) model") label(3 "AR(2) model")) ///
ytitle("")   xtitle("# of driving days") title("# of phone uses", size(large)) xlabel(0(1)10) ///
plotregion(color(white)) graphregion(color(white)) bgcolor(white) ///
saving($results/figures/plot_long_run/phone_use_long_combine, replace)


twoway line distance drive_days, color(black) || scatter distance drive_days, color(black) msymbol(s) msize(small)  ///
	|| line distance_2 drive_days, color(black) lpattern(longdash) || scatter distance_2 drive_days, color(black) msymbol(t) msize(small) ///
legend(order(1 3) label(1 "AR(1) model") label(3 "AR(2) model")) ///
ytitle("")   xtitle("# of driving days") title("distance (km)", size(large)) xlabel(0(1)10) ///
plotregion(color(white)) graphregion(color(white)) bgcolor(white) ///
saving($results/figures/plot_long_run/distance_long_combine, replace)





twoway line duration drive_days, color(black) || scatter duration drive_days, color(black) msymbol(s) msize(small)  ///
legend(order(1 3) label(1 "AR(1) model") label(3 "AR(2) model")) ///
ytitle("")   xtitle("# of driving days") title("duration (h)", size(large)) xlabel(0(1)10) ///
plotregion(color(white)) graphregion(color(white)) bgcolor(white) ///
saving($results/figures/plot_long_run/duration_long_combine, replace)



twoway line highway drive_days, color(black) || scatter highway drive_days, color(black) msymbol(s) msize(small)  ///
	|| line highway_2 drive_days, color(black) lpattern(longdash) || scatter highway_2 drive_days, color(black) msymbol(t) msize(small) ///
legend(order(1 3) label(1 "AR(1) model") label(3 "AR(2) model")) ///
ytitle("")  xtitle("# of driving days") title("# of highway uses", size(large)) xlabel(0(1)10) ///
plotregion(color(white)) graphregion(color(white)) bgcolor(white) ///
saving($results/figures/plot_long_run/highway_long_combine, replace)



grc1leg	$results/figures/plot_long_run/phone_use_long_combine.gph $results/figures/plot_long_run/distance_long_combine.gph $results/figures/plot_long_run/duration_long_combine.gph ///
			  $results/figures/plot_long_run/highway_long_combine.gph, r(2)		plotregion(color(white)) graphregion(color(white))  			  
graph export $results/figures/plot_long_run/long_run_all_combine.png, replace	



